* This dofile constructs the final dataset "fninvest_experience.dta" and "figuredata.csv" for analysis.
* File written by Kazuma Takakura kazuma.arukakat@gmail.com (University of Tokyo)
* Last updated on April 27, 2024

set more off

*** data on resettle reason ***
import excel "$path_data/resettle_reason.xls", clear first
drop if subj_id==.
keep subj_id house land workloss house_compensation land_compensation workloss_job
destring house house_compensation land_compensation, replace
gen landloss = land
replace landloss=1 if land_compensation!=.
replace house=1 if house_compensation!=.
replace workloss=1 if workloss_job!=""
recode house_compensation land_compensation(.=0)
gen compensation = house_compensation+land_compensation
save "$path_data/resettle_reason.dta" , replace

*** data on household information ***
import excel "$path_data/hh_member.xls", clear first
keep subj_id hh_working_age_1984 hh_male_1984 hh_female_1984 hh_no_sex_info hh_child_1984
save "$path_data/hh_member.dta" , replace

*** data on succession of land ***
import excel "$path_data/land_inherit.xls", clear first
recode inherit_* (.=0)
gen sum = inherit_rice + inherit_garden + inherit_dry + inherit_other
gen land_inherit = 1 if sum>0
recode land_inherit (.=0)
keep subj_id land_inherit inherit_*
save "$path_data/land_inherit.dta" , replace


*** data on behavioral components ***
use "$path_data/panel_data3.dta" , clear

/// make subj_id
gen subj_id = ceil(row/16)

/// make hyperbolic discount dummy
gen dis_a12 = 1 if disratea1=="a" & disrata2=="b"
recode dis_a12 (.=0)
gen dis_a23 = 1 if disrata2=="a" & disrata3=="b"
recode dis_a23 (.=0)
gen dis_a34 = 1 if disrata3=="a" & disrata4=="b"
recode dis_a34 (.=0)
gen dis_a45 = 1 if disrata4=="a" & disrata5=="b"
recode dis_a45 (.=0)
gen dis_a56 = 1 if disrata5=="a" & disrata6=="b"
recode dis_a56 (.=0)
gen dis_a67 = 1 if disrata6=="a" & disrata7=="b"
recode dis_a67 (.=0)
gen dis_aerror = 1 if dis_a12==0 & dis_a23==0 & dis_a34==0 & dis_a45==0 & dis_a56==0 & dis_a67==0
recode dis_aerror(.=0)
gen dis_ahighdisc = 1 if disratea1=="a" & disrata2=="a" & disrata3=="a" & disrata4=="a" & disrata5=="a" & disrata6=="a" & disrata7=="a"
recode dis_ahighdisc(.=0)
gen dis_b12 = 1 if disratb1=="a" & disratb2=="b"
recode dis_b12 (.=0)
gen dis_b23 = 1 if disratb2=="a" & disratb3=="b"
recode dis_b23 (.=0)
gen dis_b34 = 1 if disratb3=="a" & disratb4=="b"
recode dis_b34 (.=0)
gen dis_b45 = 1 if disratb4=="a" & disratb5=="b"
recode dis_b45 (.=0)
gen dis_b56 = 1 if disratb5=="a" & disratb6=="b"
recode dis_b56 (.=0)
gen dis_berror = 1 if dis_b12==0 & dis_b23==0 & dis_b34==0 & dis_b45==0 & dis_b56==0
recode dis_berror(.=0)
gen dis_bhighdisc = 1 if disratb1=="a" & disratb2=="a" & disratb3=="a" & disratb4=="a" & disratb5=="a" & disratb6=="a"
recode dis_bhighdisc(.=0)


gen futurebiased = 1 if   disrata3=="b" & disrata4=="b" & disrata5=="b" & disrata6=="b" & disrata7=="b"  & disratb3=="b" & disratb4=="b" & disratb5=="b" & disratb6=="b"
recode futurebiased(.=0)

gen dis_inconsistent = 1 if dis_a12+dis_a23+dis_a34+dis_a45+dis_a56+dis_a67>=2
replace dis_inconsistent = 1 if dis_b12+dis_b23+dis_b34+dis_b45+dis_b56>=2
replace dis_inconsistent = 1 if disratea1=="b" & disrata2=="a"
replace dis_inconsistent = 1 if disrata2=="b" & disrata3=="a"
replace dis_inconsistent = 1 if disrata3=="b" & disrata4=="a"
replace dis_inconsistent = 1 if disrata4=="b" & disrata5=="a"
replace dis_inconsistent = 1 if disrata5=="b" & disrata6=="a"
replace dis_inconsistent = 1 if disrata6=="b" & disrata7=="a"
replace dis_inconsistent = 1 if disratb1=="b" & disratb2=="a"
replace dis_inconsistent = 1 if disratb2=="b" & disratb3=="a"
replace dis_inconsistent = 1 if disratb3=="b" & disratb4=="a"
replace dis_inconsistent = 1 if disratb4=="b" & disratb5=="a"
replace dis_inconsistent = 1 if disratb5=="b" & disratb6=="a"
recode dis_inconsistent(.=0)

gen disc7 = ln(0.9)/7 if dis_a12 == 1
replace disc7 = ln(1.1)/7 if dis_a23 ==1
replace disc7 = (ln(1.1)+ln(1.2))/14 if dis_a34 ==1
replace disc7 = (ln(1.2)+ln(1.4))/14 if dis_a45 ==1
replace disc7 = (ln(1.4)+ln(1.6))/14 if dis_a56 ==1
replace disc7 = (ln(1.6)+ln(2))/14 if dis_a67 ==1
replace disc7 = ln(2)/14 if dis_aerror ==1
gen disc12 = ln(0.9)/12 if dis_b12 == 1
replace disc12 = ln(1.4)/12 if dis_b23 ==1
replace disc12 = (ln(1.4)+ln(2))/24 if dis_b34 ==1
replace disc12 = (ln(2)+ln(3))/24 if dis_b45 ==1
replace disc12 = (ln(3)+ln(4))/24 if dis_b56 ==1
replace disc12 = ln(4)/24 if dis_berror ==1
gen hyper00 = 1 if disc7>disc12
recode hyper00(.=0)

gen disc7u = ln(0.9)/7 if dis_a12 == 1
gen disc7d = 0 if dis_a12==1
replace disc7u = ln(1.1)/7 if dis_a23 ==1
replace disc7d = ln(0.9)/7 if dis_a23 == 1
replace disc7u = ln(1.2)/7 if dis_a34 ==1
replace disc7d = ln(1.1)/7 if dis_a34 ==1
replace disc7u = ln(1.4)/7 if dis_a45 ==1
replace disc7d = ln(1.2)/7 if dis_a45 ==1
replace disc7u = ln(1.6)/7 if dis_a56 ==1
replace disc7d = ln(1.4)/7 if dis_a56 ==1
replace disc7u = ln(2)/7 if dis_a67 ==1
replace disc7d = ln(1.6)/7 if dis_a67 ==1
gen disc12u = ln(0.9)/12 if dis_b12 == 1
gen disc12d = 0 if dis_b12 == 1
replace disc12u = ln(1.4)/12 if dis_b23 ==1
replace disc12d = ln(0.9)/12 if dis_b23 ==1
replace disc12u = ln(2)/12 if dis_b34 ==1
replace disc12d = ln(1.4)/12 if dis_b34 ==1
replace disc12u = ln(3)/12 if dis_b45 ==1
replace disc12d = ln(2)/12 if dis_b45 ==1
replace disc12u = ln(4)/12 if dis_b56 ==1
replace disc12d = ln(3)/12 if dis_b56 ==1

gen hyper00_2 = 1 if disc7d>disc12u
recode hyper00_2(.=0)


gen highdisc00 = 1 if dis_ahighdisc==1&dis_bhighdisc==1
recode highdisc00(.=0)
drop ccstatus ccstatusp ccstatusf caim capplied caccepted
merge 1:1 subj_id yr using "$path_data/2000_credit.dta"
drop if _merge==2
gen dummy90 = 1 if yr==1990
gen dummy85 = 1 if yr==1985
drop ccdummy1
rename cc00_1 ccdummy1
gen cc00missing=1 if ccdummy1==.
recode cc00missing(.=0)
gen cc00=ccdummy1 if cc00missing==0
destring cc00, replace 
recode cc00(.=0)
gen cc900=cc00*dummy90
bys subj_id : egen cc90 = sum(cc900)
gen cc850=cc00*dummy85
bys subj_id : egen cc85 = sum(cc850)
drop _merge dummy90 cc900 dummy85 cc850
save "$path_data/2000_data.dta", replace

*** data on floating net investment ***
use "$path_data/2nd_round_data_raw.dta" , clear

rename subj_id2000 subj_id

///fninvest-experience

gen y8500 = 1 if yr<2001
gen y8597 = 1 if yr<1998
gen y9810 = 1 if yr>1997
gen y9000 = 1 if yr<2001 & yr>1989
gen y0010 = 1 if yr>1999
gen y85 = 1 if yr==1985
gen y84 = 1 if yr==1984
gen y8590 = 1 if yr>1984 & yr<1991
recode y8500 y9000 y0010 y8597 y9810 y85 y84 y8590 (.=0)
gen ex8500 = y8500*fninvest
gen ex8597 = y8597*fninvest
gen ex9810 = y9810*fninvest
gen ex9000 = y9000*fninvest
gen ex0010 = y0010*fninvest
gen ex84 = y84*fninvest
gen ex85 = y85*fninvest
gen ex8590 = y8590*fninvest
bys subj_id: egen fn84 = total(ex84)
replace fn84=1 if fn84>0
bys subj_id: egen fn85 =  total(ex85)
replace fn85=1 if fn85>0
bys subj_id: egen fn8590 = total(ex8590)
replace fn8590=1 if fn8590>0

bys subj_id :egen fninvest_ex_8500 = total(ex8500)
bys subj_id :egen fninvest_ex_9000 = total(ex9000)
bys subj_id :egen fninvest_ex_0010 = total(ex0010)
bys subj_id :egen fninvest_ex_8597 = total(ex8597)
bys subj_id :egen fninvest_ex_9810 = total(ex9810)
gen dummy90 = 1 if yr==1990
gen dummy85 = 1 if yr==1985
recode dummy90 dummy85(.=0)
gen success900 = dummy90*total_success
bys subj_id : egen success90 = sum(success900)
gen success850 = dummy85*total_success
bys subj_id : egen success85 = sum(success850)


keep subj_id yr fninvest_ex_9000 fninvest_ex_8500 fninvest_ex_8597 fninvest_ex_9810 fninvest_ex_0010 fn84 fn85 fn8590 success90 success85
save "$path_data/fninvest_experience.dta", replace

*** data on risk preference ***
use "$path_data/panel0010.dta", clear
drop _merge
gen risk_1=game002 if yr==2000
replace risk_1=crra5g1 if yr==2010
recode risk_1(.=0)
gen risk_2=game003 if yr==2000
replace risk_2=crra5g1 if yr==2010
recode risk_2(.=0)
gen risk_3=game004 if yr==2000
replace risk_3=crra5_g3 if yr==2010
recode risk_3(.=0)
bys subj_id : egen age_old_0010=sum(age_old)
replace age_old = age_old_0010+10 if yr==2010
bys subj_id : egen numhh_0010 = sum(numhh)
replace numhh=numhh_0010 if yr==2010
gen age_old2=age_old^2
replace riceland = riceland/1000
gen riceland2 =riceland^2
rename cc00_1 cc10
merge 1:1 subj_id yr using "$path_data/fninvest_experience.dta"
drop if _merge!=3
drop _merge
save "$path_data/fninvest_experience.dta", replace


*** data on time preference in 2010 survey ***
import excel "$path_data/discount2010.xlsx", clear first
destring subj_id disc1q21 disc1q22 disc3q9 disc3q10, replace
save "$path_data/discount2010.dta", replace


*** data merge ***
use "$path_data/fninvest_experience.dta", clear
merge m:1 subj_id using "$path_data/discount2010.dta"
drop _merge

gen hyper10 = 1 if disc1q21==1 & disc3q9==2
replace hyper10 = 1 if disc1q22==1 & disc3q10==2
recode hyper10(.=0)

gen hyper10_1 = 1 if disc1q21==1 & disc3q9==2
replace hyper10_1 = 1 if disc1q22==1 & disc3q10==2
recode hyper10_1(.=0)
gen hyper10_2 = 1 if disc1q22==1 & disc3q9==2
recode hyper10_2(.=0)

gen highdisc10 = 1 if disc1q21==1 & disc3q9==1 & disc1q22==1 & disc3q10==1
recode highdisc10(.=0)


merge 1:1 subj_id yr using "$path_data/2000_data.dta", force
drop if _merge==2
drop _merge

merge m:1 subj_id using "$path_data/resettle_reason.dta", force
drop _merge

merge m:1 subj_id using "$path_data/land_inherit.dta", force
drop _merge

gen houseland = 1 if land == 1 & land_inherit == 0
replace houseland = 1 if house==1
recode houseland(.=0)

gen house_or_land = 1 if land == 1 & house == 0
replace house_or_land = 1 if house == 1 & land == 0
recode house_or_land(.=0)

gen land_all = 1 if land == 1 & land_inherit == 0
recode land_all(.=0)

///2000 dummy
gen dum00 = 1 if yr==2000
gen risk1_0=dum00*risk_1
bys subj_id : egen risk00_1=sum(risk1_0)
drop risk1_0
gen risk2_0 = dum00*risk_2
bys subj_id : egen risk00_2=sum(risk2_0)
drop risk2_0
gen risk3_0 = dum00*risk_3
bys subj_id : egen risk00_3=sum(risk3_0)
drop risk3_0
replace risk_1 = 0 if yr==2000
bys subj_id : egen risk10_1=sum(risk_1)
drop risk_1
replace risk_2 = 0 if yr==2000
bys subj_id : egen risk10_2=sum(risk_2)
drop risk_2
replace risk_3 = 0 if yr==2000
bys subj_id : egen risk10_3=sum(risk_3)
drop risk_3
gen cc_0 = dum00*cc00
bys subj_id : egen cc00_00=sum(cc_0)
gen cc_1 = dum00*cc10
bys subj_id : egen cc10_00=sum(cc_1)
drop cc_0 cc_1
gen success_0 = dum00*total_success
bys subj_id : egen success00=sum(success_0)
drop success_0
replace total_success = 0 if yr==2000
bys subj_id : egen success10=sum(total_success)
drop total_success
gen schultz_0 = dum00*schultz
bys subj_id : egen schultz00=sum(schultz_0)
drop schultz_0
replace schultz = 0 if yr==2000
bys subj_id : egen schultz10=sum(schultz)
drop schultz
gen age_old_0 = dum00*age_old
bys subj_id : egen age_old_00=sum(age_old_0)
drop age_old_0
gen age_old2_00 = age_old_00^2
bys yr subj_id : gen cc00_0 =  sum(cc00_00)
drop cc00
rename cc00_0 cc00
bys subj_id : gen cc00missing_0 =  sum(cc00missing)
drop cc00missing
rename cc00missing_0 cc00missing
bys subj_id : gen hyper00_0 =  sum(hyper00)
drop hyper00
rename hyper00_0 hyper00
bys subj_id : gen hyper00_22 =  sum(hyper00_2)
drop hyper00_2
rename hyper00_22 hyper00_2
bys subj_id : gen highdisc00_0 =  sum(highdisc00)
drop highdisc00
rename highdisc00_0 highdisc00
bys subj_id : gen dis_inconsistent_0 =  sum(dis_inconsistent)
drop dis_inconsistent
rename dis_inconsistent_0 dis_inconsistent00
gen schultz_9 = success90*high_edu
bys subj_id : gen schultz90 = sum(schultz_9)
drop schultz_9
gen schultz_8 = success85*high_edu
bys subj_id : gen schultz85 = sum(schultz_8)
drop schultz_8
bys subj_id : egen cc85total=total(cc85)
drop cc85
rename cc85total cc85
bys subj_id : egen cc90total=total(cc90)
drop cc90
rename cc90total cc90
bys subj_id : gen fnunit00=sum(fnunit_0)
drop fnunit_0

///risk missing
gen risk00_missing_temp = 1 if game002==. & yr == 2000
bys subj_id : egen risk00_missing=sum(risk00_missing_temp)


/// standarization
bys yr : egen risk10_1mean = mean(risk10_1)
bys yr : egen risk10_1sd = sd(risk10_1)
gen risk10_1std = (risk10_1-risk10_1mean)/risk10_1sd
drop risk10_1mean risk10_1sd
bys yr : egen risk10_2mean = mean(risk10_2)
bys yr : egen risk10_2sd = sd(risk10_2)
gen risk10_2std = (risk10_2-risk10_2mean)/risk10_2sd
drop risk10_2mean risk10_2sd
bys yr : egen risk10_3mean = mean(risk10_3)
bys yr : egen risk10_3sd = sd(risk10_3)
gen risk10_3std = (risk10_3-risk10_3mean)/risk10_3sd
drop risk10_3mean risk10_3sd

replace risk00_1 = . if risk00_missing == 1
bys yr : egen risk00_1mean = mean(risk00_1)
bys yr : egen risk00_1sd = sd(risk00_1)
gen risk00_1std = (risk00_1-risk00_1mean)/risk00_1sd
drop risk00_1mean risk00_1sd
replace risk00_1std = 0 if risk00_1 == .

replace risk00_2 = . if risk00_missing == 1
bys yr : egen risk00_2mean = mean(risk00_2)
bys yr : egen risk00_2sd = sd(risk00_2)
gen risk00_2std = (risk00_2-risk00_2mean)/risk00_2sd
drop risk00_2mean risk00_2sd
replace risk00_2std = 0 if risk00_2 == .

replace risk00_3 = . if risk00_missing == 1
bys yr : egen risk00_3mean = mean(risk00_3)
bys yr : egen risk00_3sd = sd(risk00_3)
gen risk00_3std = (risk00_3-risk00_3mean)/risk00_3sd
drop risk00_3mean risk00_3sd
replace risk00_3std = 0 if risk00_3 == .

bys yr : egen cc00mean = mean(cc00)
bys yr : egen cc00sd = sd(cc00)
gen cc00std = (cc00-cc00mean)/cc00sd
bys yr : egen cc10mean = mean(cc10)
bys yr : egen cc10sd = sd(cc10)
gen cc10std = (cc10-cc10mean)/cc10sd
bys yr : egen success00mean = mean(success00)
bys yr : egen success00sd = sd(success00)
gen success00std = (success00-success00mean)/success00sd
drop success00mean success00sd
bys yr : egen success10mean = mean(success10)
bys yr : egen success10sd = sd(success10)
gen success10std = (success10-success10mean)/success10sd
drop success10mean success10sd
bys yr : egen schultz00mean = mean(schultz00)
bys yr : egen schultz00sd = sd(schultz00)
gen schultz00std = (schultz00-schultz00mean)/schultz00sd
drop schultz00mean schultz00sd
bys yr : egen schultz10mean = mean(schultz10)
bys yr : egen schultz10sd = sd(schultz10)
gen schultz10std = (schultz10-schultz10mean)/schultz10sd
drop schultz10mean schultz10sd
egen hyper00mean = mean(hyper00)
egen hyper00sd = sd(hyper00)
gen hyper00std = (hyper00-hyper00mean)/hyper00sd
drop hyper00mean hyper00sd
egen hyper00_2mean = mean(hyper00_2)
egen hyper00_2sd = sd(hyper00_2)
gen hyper00_2std = (hyper00_2-hyper00_2mean)/hyper00_2sd
drop hyper00_2mean hyper00_2sd
egen hyper10mean = mean(hyper10)
egen hyper10sd = sd(hyper10)
gen hyper10std = (hyper10-hyper10mean)/hyper10sd
drop hyper10mean hyper10sd
egen hyper10_1mean = mean(hyper10_1)
egen hyper10_1sd = sd(hyper10_1)
gen hyper10_1std = (hyper10_1-hyper10_1mean)/hyper10_1sd
drop hyper10_1mean hyper10_1sd
egen hyper10_2mean = mean(hyper10_2)
egen hyper10_2sd = sd(hyper10_2)
gen hyper10_2std = (hyper10_2-hyper10_2mean)/hyper10_2sd
drop hyper10_2mean hyper10_2sd
egen highdisc10mean = mean(highdisc10)
egen highdisc10sd = sd(highdisc10)
gen highdisc10std = (highdisc10-highdisc10mean)/highdisc10sd
drop highdisc10mean highdisc10sd
egen highdisc00mean = mean(highdisc00)
egen highdisc00sd = sd(highdisc00)
gen highdisc00std = (highdisc00-highdisc00mean)/highdisc00sd
drop highdisc00mean highdisc00sd
egen success90mean = mean(success90)
egen success90sd = sd(success90)
gen success90std = (success90-success90mean)/success90sd
drop success90mean success90sd
egen success85mean = mean(success85)
egen success85sd = sd(success85)
gen success85std = (success85-success85mean)/success85sd
drop success85mean success85sd
egen cc90mean = mean(cc90)
egen cc90sd = sd(cc90)
gen cc90std = (cc90-cc90mean)/cc90sd
drop cc90mean cc90sd
egen cc85mean = mean(cc85)
egen cc85sd = sd(cc85)
gen cc85std = (cc85-cc85mean)/cc85sd
drop cc85mean cc85sd
egen schultz90mean = mean(schultz90)
egen schultz90sd = sd(schultz90)
gen schultz90std = (schultz90-schultz90mean)/schultz90sd
drop schultz90mean schultz90sd
egen schultz85mean = mean(schultz85)
egen schultz85sd = sd(schultz85)
gen schultz85std = (schultz85-schultz85mean)/schultz85sd
drop schultz85mean schultz85sd


///age missing & outlier
gen age_missing = 1 if age_old<20
recode age_missing(.=0)
replace age_old=0 if age_missing == 1
replace age_old2 = age_old^2

///num of HH missing
*replace numhh = 13 if subj_id == 181
gen numhh_missing = 1 if numhh==0
recode numhh_missing (.=0)

///village missing
replace village = 0 if subj_id == 309

/// FN experience dummy
drop if yr==2010
drop yr yr1
gen fn8500=1 if fninvest_ex_8500>0
gen fn0010=1 if fninvest_ex_0010>0
gen fn9000=1 if fninvest_ex_9000>0
gen fn8597=1 if fninvest_ex_8597>0
gen fn9810=1 if fninvest_ex_9810>0
recode fn8500 fn9000 fn0010 fn8597 fn9810(.=0)

/// age dummy
gen age20s = 1 if age_old>=20 & age_old<30
gen age30s = 1 if age_old>=30 & age_old<40
gen age40s = 1 if age_old>=40 & age_old<50
gen age50s = 1 if age_old>=50 & age_old<60
gen age60s = 1 if age_old>=60 & age_old<70
gen age70s = 1 if age_old>=70 & age_old<80
gen age80s = 1 if age_old>=80 & age_old<90

/// Number of HH members dummy
gen numhh13 = 1 if numhh >= 1 & numhh < 4
gen numhh46 = 1 if numhh >= 4 & numhh < 7
gen numhh79 = 1 if numhh >= 7 & numhh < 10
gen numhh1013 = 1 if numhh >= 10
gen numhh2 = numhh^2


recode age* numhh* (.=0)


save "$path_data/fninvest_experience.dta", replace


*** data on CPI ***
import excel "$path_data/cpi_8500.xlsx", clear first
save "$path_data/cpi.dta", replace

*** data on income ***
import excel "$path_data/20120906_Complete_Rekap_test_11Feb2010-rev1-rev yrk.xlsx", clear first
keep IDRespondent ComparedtobeforeSagulingD CL
rename IDRespondent subj_id
drop if subj_id==.
rename ComparedtobeforeSagulingD incomechange
rename CL incomechange_cri
gen incomeup = 1 if incomechange<4
gen incomeup2 = 1 if incomechange_cri<4
recode incomeup incomeup2 (.=0)
gen incomechange_8510 = 0 if incomechange == 4
replace incomechange_8510 = 50 if incomechange==1
replace incomechange_8510 = 37.5 if incomechange==2
replace incomechange_8510 = 17.5 if incomechange==3
replace incomechange_8510 = -17.5 if incomechange==5
replace incomechange_8510 = -37.5 if incomechange==6
gen incomechange_8597 = 0 if incomechange_cri == 4
replace incomechange_8597 = 50 if incomechange_cri==1
replace incomechange_8597 = 37.5 if incomechange_cri==2
replace incomechange_8597 = 17.5 if incomechange_cri==3
replace incomechange_8597 = -17.5 if incomechange_cri==5
replace incomechange_8597 = -37.5 if incomechange_cri==6
gen incomechange_9810 = ((1+incomechange_8510/100)/(1+incomechange_8597/100)-1)*100
save "$path_data/incomechange.dta", replace


*** data on FN investment ***
use "$path_data/completedata_analysis85-00.dta", replace
keep subj_id yr fnincome costfn_cost fninvest
drop if fninvest==0
gen fnprofit = fnincome-costfn_cost
drop if fnprofit==.
merge m:1 yr using "$path_data/cpi.dta"
drop _merge
gen fnprofit_real = fnprofit/cpi


bys yr : egen realprofitmean = mean(fnprofit_real)
gen fnincome_real = fnincome/cpi
gen incomemil = fnincome_real/1000000
bys yr : egen realincomemean = mean(incomemil)
gen fncost_real = costfn_cost/cpi
bys yr : egen realcostmean = mean(incomemil)
bys yr : egen realincomesd = sd(incomemil)
gen upperbound = realincomemean + realincomesd * 1.96
gen lowerbound = realincomemean - realincomesd * 1.96


merge m:1 subj_id using "$path_data/fninvest_experience.dta"
save "$path_data/fninvest_income.dta", replace
drop if _merge==2
gen continuefn = fn8500*fn0010
label define confnlabel 1 "continue FN 1985-2010" 0 "exit before 2000"
label val continuefn confnlabel

gen zeroincome = 1 if fnincome==0
recode zeroincome(.=0)
bys yr continuefn: egen zeronum = total(zeroincome)
gen nonzeroincome = 1-zeroincome
bys yr continuefn: egen nonzeronum = total(nonzeroincome)
gen zerorate = zeronum/(zeronum+nonzeronum)
bys yr continuefn: egen fnincome_status = mean(incomemil)
bys yr : egen fnnum = total(fninvest)
bys yr : egen activefnnum = total(fninvest*(1-zeroincome))

gen incomeperunit = realincomemean/fnunit

gen income85 = incomemil if yr == 1985
gen income97 = incomemil if yr == 1997
recode income*(.=0)
bys subj_id : egen income_85 = total(income85)
bys subj_id : egen income_97 = total(income97)
gen trueincomechange = income_97/income_85 if income_85>0 & income_97>0

collapse (mean) realincomemean fnincome_status fnnum activefnnum, by (yr continuefn)
reshape wide fnincome_status, i(yr) j(continuefn)
export delim using "$path_data/figuredata.csv", replace
